Now let’s get plotting! I want to look intially for each year at the number of calls per tract versus other measures of the tract. Let’s start with 2010:
nyc_2010 %>%
drop_na(calls_per_person, total_pop) %>%
ggplot(mapping = aes(x = total_pop, y = calls_per_person)) +
geom_point()
Looking at this, we want to remove some outliers. Let’s suppose that we are only going to look at tracts that have more than 2000 residents.
nyc_2010 %>%
filter(total_pop > 2000) %>%
ggplot(mapping = aes(x = total_pop, y = calls_per_person)) +
geom_point()
Now let’s measure versus other tract measurements. We have a few to use:
Average rent:
nyc_2010 %>%
filter(total_pop > 2000) %>%
mutate(avg_rent = na_if(avg_rent, NaN)) %>%
drop_na(avg_rent, calls_per_person) %>%
ggplot(mapping = aes(x = avg_rent, y = calls_per_person)) +
geom_point()
Percent white:
nyc_2010 %>%
filter(total_pop > 2000) %>%
ggplot(mapping = aes(x = percent_white, y = calls_per_person)) +
geom_point()
Median household income:
nyc_2010 %>%
filter(total_pop > 2000) %>%
drop_na(calls_per_person, med_income) %>%
ggplot(mapping = aes(x = med_income, y = calls_per_person)) +
geom_point()
What I’m most interested in is the change from years in terms of calls.
reform_df <- function(data, year){
data %>%
mutate(
year = year,
rent_increase = if_else(
(nyc_2017$avg_rent > avg_rent),
true = TRUE,
false = FALSE
),
white_increase = if_else(
(nyc_2017$percent_white > percent_white),
true = TRUE,
false = FALSE
),
income_increase = if_else(
(nyc_2017$med_income > med_income),
true = TRUE,
false = FALSE
)
) %>%
select(-geometry, -num_calls)
}
nyc_2010 <- reform_df(nyc_2010, 2010)
nyc_2011 <- reform_df(nyc_2011, 2011)
nyc_2012 <- reform_df(nyc_2012, 2012)
nyc_2013 <- reform_df(nyc_2013, 2013)
nyc_2014 <- reform_df(nyc_2014, 2014)
nyc_2015 <- reform_df(nyc_2015, 2015)
nyc_2016 <- reform_df(nyc_2016, 2016)
nyc_2017 <- reform_df(nyc_2017, 2017)
full_nyc <-
nyc_2010 %>%
rbind(nyc_2011, nyc_2012, nyc_2013, nyc_2014, nyc_2015, nyc_2016, nyc_2017)
nyc_3pts <-
rbind(nyc_2010, nyc_2014, nyc_2017)
full_nyc %>%
filter(total_pop > 2000, rent_increase == TRUE) %>%
mutate(avg_rent = na_if(avg_rent, NaN)) %>%
drop_na(avg_rent, calls_per_person) %>%
arrange(fips, year) %>%
ggplot(mapping = aes(x = avg_rent, y = calls_per_person)) +
geom_point(aes(alpha = year), size = 2) +
geom_path(aes(alpha = year), size = 1) +
scale_alpha_continuous(breaks = seq(2010, 2017, 1), limits = c(2010, 2017))
full_nyc %>%
filter(total_pop > 2000, white_increase == TRUE) %>%
drop_na(percent_white, calls_per_person) %>%
arrange(fips, year) %>%
ggplot(mapping = aes(x = percent_white, y = calls_per_person)) +
geom_point(aes(alpha = year), size = 2) +
geom_path(aes(alpha = year), size = 1) +
scale_alpha_continuous(breaks = seq(2010, 2017, 1), limits = c(2010, 2017))
full_nyc %>%
filter(total_pop > 2000, income_increase == TRUE) %>%
drop_na(med_income, calls_per_person) %>%
arrange(fips, year) %>%
ggplot(mapping = aes(x = med_income, y = calls_per_person)) +
geom_point(aes(alpha = year), size = 2) +
geom_path(aes(alpha = year), size = 1) +
scale_alpha_continuous(breaks = seq(2010, 2017, 1), limits = c(2010, 2017))
inner_join(nyc_2010, nyc_2017, by = "fips") %>%
filter(total_pop.x > 2000, avg_rent.y > avg_rent.x) %>%
mutate(avg_rent.x = na_if(avg_rent.x, NaN)) %>%
drop_na(avg_rent.x, calls_per_person.x) %>%
ggplot(mapping = aes(x = avg_rent.x, y = calls_per_person.x)) +
geom_point(aes(color = "2010")) +
geom_point(aes(x = avg_rent.y, y = calls_per_person.y, color = "2017")) +
geom_segment(aes(xend = avg_rent.y, yend = calls_per_person.y))
inner_join(nyc_2010, nyc_2017, by = "fips") %>%
filter(total_pop.x > 2000, percent_white.y > percent_white.x) %>%
drop_na(percent_white.x, calls_per_person.x) %>%
ggplot(mapping = aes(x = percent_white.x, y = calls_per_person.x)) +
geom_point(aes(color = "2010")) +
geom_point(aes(x = percent_white.y, y = calls_per_person.y, color = "2017")) +
geom_segment(aes(xend = percent_white.y, yend = calls_per_person.y))
inner_join(nyc_2010, nyc_2017, by = "fips") %>%
filter(total_pop.x > 2000, med_income.y > med_income.x) %>%
drop_na(med_income.x, calls_per_person.x) %>%
ggplot(mapping = aes(x = med_income.x, y = calls_per_person.x)) +
geom_point(aes(color = "2010")) +
geom_point(aes(x = med_income.y, y = calls_per_person.y, color = "2017")) +
geom_segment(aes(xend = med_income.y, yend = calls_per_person.y))
inner_join(nyc_2010, nyc_2017, by = "fips") %>%
filter(total_pop.x > 2000, med_income.y > med_income.x) %>%
drop_na(med_income.x, calls_per_person.x) %>%
ggplot(mapping = aes(x = med_income.x, y = calls_per_person.x)) +
geom_segment(
aes(xend = med_income.y, yend = calls_per_person.y),
arrow = arrow(length = unit(0.2, "cm"))
)
filter_data <- function(data){
data %>%
filter(total_pop > 2000) %>%
drop_na(med_income)
}
filtered_2010 <- filter_data(nyc_2010)
filtered_2012 <- filter_data(nyc_2012)
filtered_2014 <- filter_data(nyc_2014)
filtered_2016 <- filter_data(nyc_2016)
filtered_2010 %>%
ggplot(aes(x = med_income, y = calls_per_person)) +
geom_point(aes(color = "2010")) +
geom_point(
data = filtered_2012,
aes(x = med_income, y = calls_per_person, color = "2012")
) +
geom_point(
data = filtered_2014,
aes(x = med_income, y = calls_per_person, color = "2014")
) +
geom_point(
data = filtered_2016,
aes(x = med_income, y = calls_per_person, color = "2016")
)
filtered_2010 %>%
select(fips, med_income_2010 = med_income, calls_2010 = calls_per_person) %>%
inner_join(
filtered_2012 %>%
select(fips, med_income_2012 = med_income, calls_2012 = calls_per_person),
by = "fips"
) %>%
inner_join(
filtered_2014 %>%
select(fips, med_income_2014 = med_income, calls_2014 = calls_per_person),
by = "fips"
) %>%
inner_join(
filtered_2016 %>%
select(fips, med_income_2016 = med_income, calls_2016 = calls_per_person),
by = "fips"
) %>%
ggplot() +
geom_segment(
aes(
x = med_income_2010,
y = calls_2010,
xend = med_income_2012,
yend = calls_2012
),
color = "light gray"
) +
geom_segment(
aes(
x = med_income_2012,
y = calls_2012,
xend = med_income_2014,
yend = calls_2014
),
color = "light gray"
) +
geom_segment(
aes(
x = med_income_2014,
y = calls_2014,
xend = med_income_2016,
yend = calls_2016
),
color = "light gray"
) +
geom_point(aes(x = med_income_2010, y = calls_2010, color = "2010")) +
geom_point(aes(x = med_income_2012, y = calls_2012, color = "2012")) +
geom_point(aes(x = med_income_2014, y = calls_2014, color = "2014")) +
geom_point(aes(x = med_income_2016, y = calls_2016, color = "2016")) +
scale_color_manual(
values = c("red3", "orange3", "yellow3", "springgreen4")
) +
theme()
inner_join(nyc_2010, nyc_2017, by = "fips") %>%
filter(total_pop.x > 2000, med_income.y > med_income.x) %>%
drop_na(med_income.x, calls_per_person.x) %>%
ggplot(mapping = aes(x = med_income.x, y = calls_per_person.x)) +
geom_segment(
aes(xend = med_income.y, yend = calls_per_person.y),
arrow = arrow(length = unit(0.2, "cm"))
) +
geom_label(
x = 200000,
y = 0.8,
label = "1 arrow = 1 census tract"
) +
annotate(
geom = "label",
x = 203000,
y = 0.5,
label = "2010",
color = "red",
size = 3
) +
annotate(
geom = "label",
x = 255000,
y = 0.3,
label = "2017",
color = "red",
size = 3
) +
scale_x_continuous(labels = scales::dollar) +
coord_fixed(ratio = 200000) +
theme_classic() +
labs(
title = "311 Calls increase with Median Income from 2010 to 2017",
subtitle = "Tracking 311 calls for Census Tracts in New York City",
x = "Median Annual Income",
y = "Calls per person",
caption = "Source: NYC Open Data, American Fact Finder"
)